package com.sweetdream.statistics

import com.sweetdream.profile.Person
import org.apache.flink.api.scala.{ExecutionEnvironment, _}

/**
 * Title: 单个属性统计
 * Description:
 * Date 2020/12/10
 */
object SingleCount {
  def main(args: Array[String]): Unit = {
    // 1. env
    val env = ExecutionEnvironment.getExecutionEnvironment

    // 2. source
    val resource = getClass.getResource("/train.csv")
    val data = env.readCsvFile[Person](resource.getPath, ignoreFirstLine = true,
      pojoFields = Array("PassengerId", "Survived", "Pclass", "Name", "Sex", "Age", "SibSp", "Parch", "Ticket", "Fare", "Cabin", "Embarked"))

    // 3. transform
    // TODO...统计性别、存活人数、各年龄段人数等
    val sexCount = data.map(x => (x.getSex, 1)).groupBy(0).sum(1)
    val survivedCount = data.map(x => (x.getSurvived, 1)).groupBy(0).sum(1)

    // 4. sink
    sexCount.print()
    survivedCount.print()
  }
}
